{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_abbreviation_full_form(sentence):\n",
    "    pattern = r\"\\(([^)]+)\\)\"\n",
    "    matches = re.finditer(pattern, sentence)\n",
    "    abbr_dict = {} \n",
    "    \n",
    "    for match in matches:\n",
    "        abbr = match.group(1) \n",
    "        before_bracket_text = sentence[:match.start()].strip()  \n",
    "        words = before_bracket_text.split() \n",
    "        abbr_letters = re.sub(r\"[^A-Za-z]\", \"\", abbr).lower() \n",
    "        full_form_candidates = []\n",
    "        \n",
    "        for i in range(len(words) - 1, -1, -1): \n",
    "            word = re.sub(r\"[^A-Za-z]\", \"\", words[i]).lower() \n",
    "            full_form_candidates.append(words[i])\n",
    "            candidate_letters = \"\".join([re.sub(r\"[^A-Za-z]\", \"\", w).lower() for w in full_form_candidates[::-1]])\n",
    "            if is_complete_match(abbr_letters, candidate_letters):\n",
    "                abbr_dict[abbr] = \" \".join(full_form_candidates[::-1])\n",
    "                break \n",
    "            \n",
    "    return abbr_dict\n",
    "\n",
    "\n",
    "def is_complete_match(sub, full):\n",
    "    if not sub:\n",
    "        return False\n",
    "    it = iter(full)\n",
    "    return all(char in it for char in sub) and full.startswith(sub[0])\n",
    "\n",
    "def process_q_row(qa_list):\n",
    "    abbr = find_abbreviation_full_form(str(qa_list))\n",
    "    for i in range(len(qa_list)):\n",
    "        if abbr:\n",
    "            for abbr_key, full_form in abbr.items():\n",
    "                pattern = r'\\b'+ re.escape(abbr_key)+r'\\b'\n",
    "                qa_list[i] = re.sub(pattern, re.escape(full_form), str(qa_list[i]))\n",
    "    return qa_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  Dataset\n",
    "df_all = pd.read_json('../QA_data/inf_processed_all.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Process the Q&A \n",
    "df_all['New Q&A'] = df_all.loc[:,\"Q&A\"].apply(process_q_row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[\"{'Q': 'What is the purpose of the study presented in the paper?', 'A': 'The purpose of the study is to evaluate the potential of a novel alcohol sensor based on nanosized SrCO3 for the detection of ethanol vapor.'}\",\n",
       " \"{'Q': 'What is the mechanism of chemiluminescence (chemiluminescence) from the catalytic oxidation of ethanol on SrCO3?', 'A': 'The chemiluminescence mechanism involves the oxidation of ethanol to produce acetaldehyde and ethylene, which are then converted to carbon dioxide and water. The chemiluminescence is attributed to the oxidation of these intermediates by SrCO3.'}\",\n",
       " \"{'Q': 'What are the advantages of using nanosized SrCO3 as a sensing material?', 'A': 'The advantages of using nanosized SrCO3 include its high activity, good selectivity, and small size, which make it suitable for detecting small concentrations of gas molecules.'}\",\n",
       " \"{'Q': 'How does the temperature dependence of the chemiluminescence intensity affect the performance of the sensor?', 'A': 'The temperature dependence of the chemiluminescence intensity affects the performance of the sensor by increasing the conversion of ethanol to ethylene at higher temperatures, leading to a higher chemiluminescence intensity.'}\",\n",
       " \"{'Q': 'What is the role of flow rate in the chemiluminescence detection system?', 'A': 'The flow rate affects the chemiluminescence detection system by controlling the diffusion of ethanol vapor to the SrCO3 sensor, which in turn affects the conversion of ethanol to ethylene and the chemiluminescence intensity.'}\",\n",
       " \"{'Q': 'What is the detection limit of the sensor?', 'A': 'The detection limit of the sensor is 2.1 ppm, which is defined as three times the standard deviation of the background noise.'}\",\n",
       " \"{'Q': 'How does the sensor respond to different concentrations of ethanol vapor?', 'A': 'The sensor responds to different concentrations of ethanol vapor by producing a linear calibration curve with a detection limit of 2.1 ppm.'}\",\n",
       " \"{'Q': 'What is the selectivity of the sensor to ethanol vapor?', 'A': 'The sensor exhibits a high selectivity to ethanol vapor, with no response observed for gasoline, ammonia, or hydrogen.'}\",\n",
       " \"{'Q': 'How does the sensor maintain its stability and durability over time?', 'A': 'The sensor maintains its stability and durability by using a simple chemical precipitation method for the synthesis of nanosized SrCO3 and by operating at a temperature of 380 °C.'}\",\n",
       " \"{'Q': 'What is the potential application of the SrCO3 sensor in real-world scenarios?', 'A': 'The SrCO3 sensor has potential applications in breath analyzers, industrial monitoring, and environmental processes, where the detection of ethanol vapor is critical.'}\"]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all['New Q&A'][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save dataframe to csv file\n",
    "df_all.to_csv('abbr_complement.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all.to_json('../QA_data/inf_processed_all_new.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_test = df_all.head(10)\n",
    "df_test.to_csv('test.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "comp9021",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
